package translate

import (
	"crawler_translation/utils"
	"encoding/json"
	"fmt"
	"io/ioutil"
	"net/http"
	"net/url"
	"regexp"
	"strings"
)

/**
翻译
*/

type Translate struct {
	stringFilterArr []string
}

func NewTranslate() *Translate {
	return &Translate{
		stringFilterArr: []string{"", "&ZeroWidthSpace;", "en\",", " ", "[占位]", "."},
	}
}

// HtmlToEn 将html中的英文转换为中文
func (this *Translate) HtmlToEn(html string) (retHtml string, err error) {
	// 预设 不翻译的字符串
	stringFilter := make(map[string]struct{})
	for _, valuer := range this.stringFilterArr {
		stringFilter[valuer] = struct{}{}
	}

	// 去除特殊字符
	specialMap := make(map[int]struct{})
	specialMap[8203] = struct{}{}
	html = utils.RmString(html, specialMap)

	// 将不翻译的节点提前替换
	filterHtml, codeStore := this.FilterCode(html)

	expr := `<[^>]+>`
	reg := regexp.MustCompile(expr)
	var posMap []string
	for _, val := range reg.FindAllString(filterHtml, -1) {
		posMap = append(posMap, val)
	}

	newHtml := reg.ReplaceAllStringFunc(filterHtml, func(s string) string {
		return "[替换]"
	})

	retHtml, err = this.String2English(newHtml, posMap, stringFilter)
	if err != nil {
		return "", err
	}

	retHtml = this.ReplaceAllString(retHtml, codeStore, `\[占位\]`)

	// 过滤和替换
	retHtml = this.FilterString(retHtml, map[string]string{
		"en\",": "",
	})

	return
}

// FilterString 过滤和替换字符串
func (this *Translate) FilterString(str string, replace map[string]string) (nowStr string) {
	// 将 . 替换为 ,
	// 将 en", 替换为 ""

	for key, val := range replace {
		str = strings.ReplaceAll(str, key, val)
	}
	nowStr = str
	return
}

// String2English 翻译字符串
func (this *Translate) String2English(str string, posMap []string, stringFilter map[string]struct{}) (retHtml string, err error) {
	strArr := strings.Split(str, `[替换]`)

	for i := 0; i < len(strArr)-1; i++ {
		val := strArr[i+1]
		if _, ok := stringFilter[val]; !ok {
			if strings.Index(val, "[占位]") != -1 {
				// 将内容根据 [占位] 分割
				valArr := strings.Split(val, `[占位]`)
				newVal := ""
				for key, item := range valArr {
					itemRet, err := this.TranslateEn2Ch(this.FilterString(item, map[string]string{
						"\n": "",
					}))
					if err != nil {
						return "", err
					}
					if len(valArr) != key+1 {
						newVal += itemRet + "[占位]"
					} else {
						newVal += itemRet
					}
				}
				val = newVal
			} else {
				val, err = this.TranslateEn2Ch(this.FilterString(val, map[string]string{
					"\n": "",
				}))
				if err != nil {
					return
				}
			}
		}

		retHtml += posMap[i] + val
	}
	return
}

func (this *Translate) FilterCode(html string) (string, []string) {
	expr := `<code([\s\S]*?)>([\s\S]*?)</code>`
	reg := regexp.MustCompile(expr)

	var posMap []string
	for _, val := range reg.FindAllString(html, -1) {
		posMap = append(posMap, val)
	}

	newHtml := reg.ReplaceAllStringFunc(html, func(s string) string {
		return "[占位]"
	})

	return newHtml, posMap
}

// ReplaceAllString 替换指定字符
func (this *Translate) ReplaceAllString(str string, moreReplace []string, expr string) string {
	reg := regexp.MustCompile(expr)

	var i int
	retHtml := reg.ReplaceAllStringFunc(str, func(s string) string {
		i++
		return moreReplace[i-1]
	})

	return retHtml
}

// TranslateEn2Ch 将英文文本转换为中文文本
func (this *Translate) TranslateEn2Ch(text string) (string, error) {
	urlStr := fmt.Sprintf("https://clients5.google.com/translate_a/t?client=dict-chrome-ex&sl=auto&tl=zh-cn&q=%s", url.QueryEscape(text))
	resp, err := http.Get(urlStr)
	if err != nil {
		return "", err
	}
	defer resp.Body.Close()
	if err != nil {
		return "", err
	}
	bs, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return "", err
	}
	//返回的json反序列化
	var result [][]string
	err = json.Unmarshal(bs, &result)
	if err != nil {
		return "", err
	}

	var zhText string
	if len(result) > 0 && len(result[0]) > 0 {
		zhText = result[0][0]
	}

	return zhText, nil
}
